/*    Copyright 2010 Tobias Marschall
 *
 *    This file is part of MoSDi.
 *
 *    MoSDi is free software: you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation, either version 3 of the License, or
 *    (at your option) any later version.
 *
 *    MoSDi is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with MoSDi.  If not, see <http://www.gnu.org/licenses/>.
 */

package mosdi.tests;

import java.util.ArrayList;
import java.util.List;

import junit.framework.TestCase;
import mosdi.fa.Alphabet;
import mosdi.fa.CDFA;
import mosdi.fa.DFAFactory;
import mosdi.fa.GeneralizedString;
import mosdi.fa.GeneralizedStringsNFA;
import mosdi.fa.NFA;
import mosdi.util.Iupac;
import mosdi.util.IupacStringConstraints;
import mosdi.util.iterators.IupacPatternGenerator;

public class DFAFactoryTest extends TestCase{

	static final Alphabet dnaAlphabet = Alphabet.getDnaAlphabet();
	
	public static void testEqualMethodManual() {
		ArrayList<GeneralizedString> list = new ArrayList<GeneralizedString>();
		ArrayList<GeneralizedString> list2 = new ArrayList<GeneralizedString>();
		
		list.add(Iupac.toGeneralizedString("AC"));
		list.add(Iupac.toGeneralizedString("AM"));
		list2.add(Iupac.toGeneralizedString("MM"));
		CDFA dfa1 = DFAFactory.build(dnaAlphabet, new GeneralizedStringsNFA(list), 1000000).minimize();	
		CDFA dfa2 = DFAFactory.build(dnaAlphabet, new GeneralizedStringsNFA(list2), 1000000).minimize(); 
		assertTrue(!dfa1.equals(dfa2));
		
		
		list.clear();
		list2.clear();
		list.add(Iupac.toGeneralizedString("AGCT"));
		list.add(Iupac.toGeneralizedString("VVCT"));
		list2.add(Iupac.toGeneralizedString("AGCT"));
		list2.add(Iupac.toGeneralizedString("AVCT"));
		dfa1 = DFAFactory.build(dnaAlphabet, new GeneralizedStringsNFA(list), 1000000).minimize();
		dfa2 = DFAFactory.build(dnaAlphabet, new GeneralizedStringsNFA( list2), 1000000).minimize(); 
		assertTrue(!dfa1.equals(dfa2));	
		
		//union which does work (at the end)
		list.clear();
		list2.clear();
		list.add(Iupac.toGeneralizedString("AGT"));
		list.add(Iupac.toGeneralizedString("AGC"));
		list2.add(Iupac.toGeneralizedString("AGY"));
		dfa1 = DFAFactory.build(dnaAlphabet, new GeneralizedStringsNFA(list), 1000000).minimize();
		dfa2 = DFAFactory.build(dnaAlphabet, new GeneralizedStringsNFA( list2), 1000000).minimize(); 
		assertTrue(dfa1.equals(dfa2));	
		
		//union in the middle which is correct
		list.clear();
		list2.clear();
		list.add(Iupac.toGeneralizedString("ATC"));
		list.add(Iupac.toGeneralizedString("AGC"));
		list2.add(Iupac.toGeneralizedString("AKC"));
		dfa1 = DFAFactory.build(dnaAlphabet, new GeneralizedStringsNFA(list), 1000000).minimize();
		dfa2 = DFAFactory.build(dnaAlphabet, new GeneralizedStringsNFA( list2), 1000000).minimize(); 
		assertTrue(dfa1.equals(dfa2));	

		//2 is in 1 included but they are not the same
		list.clear();
		list2.clear();
		list.add(Iupac.toGeneralizedString("ASW"));
		list.add(Iupac.toGeneralizedString("CCT"));
		list2.add(Iupac.toGeneralizedString("MCT"));
		dfa1 = DFAFactory.build(dnaAlphabet, new GeneralizedStringsNFA(list), 1000000).minimize();
		dfa2 = DFAFactory.build(dnaAlphabet, new GeneralizedStringsNFA( list2), 1000000).minimize(); 
		assertTrue(!dfa1.equals(dfa2));	

		//"stupid" union doesn't work either
		list.clear();
		list2.clear();
		list.add(Iupac.toGeneralizedString("ACG"));
		list.add(Iupac.toGeneralizedString("CTA"));
		list2.add(Iupac.toGeneralizedString("MYR"));
		dfa1 = DFAFactory.build(dnaAlphabet, new GeneralizedStringsNFA(list), 1000000).minimize();
		dfa2 = DFAFactory.build(dnaAlphabet, new GeneralizedStringsNFA( list2), 1000000).minimize(); 
		assertTrue(!dfa1.equals(dfa2));
	}

	public void testCDFAfromIupac() {
		final String sequence = "ATTGCGATGTTGGAGACTCTCAGGGTGCGAAAGCTGCTCAACCTTTCTAATCTCCGTTGGCCGAACAGCTGTCCACAGCGTTGACGTATCCCCGAAATGC";
		final int[] intSequence = Alphabet.getDnaAlphabet().buildIndexArray(sequence);
		assertEquals(3, DFAFactory.buildFromIupacPattern("MCCN", false).countMatches(sequence));
		assertEquals(3, DFAFactory.buildFromIupacPattern("MCCN", false).countMatches(intSequence));
		assertEquals(5, DFAFactory.buildFromIupacPattern("MCCN", true).countMatches(sequence));
		assertEquals(5, DFAFactory.buildFromIupacPattern("MCCN", true).countMatches(intSequence));
		assertEquals(1, DFAFactory.buildFromIupacPattern("ACGT", false).countMatches(sequence));
		assertEquals(1, DFAFactory.buildFromIupacPattern("ACGT", false).countMatches(intSequence));
		assertEquals(2, DFAFactory.buildFromIupacPattern("ACGT", true).countMatches(sequence));
		assertEquals(2, DFAFactory.buildFromIupacPattern("ACGT", true).countMatches(intSequence));
	}
	
	public void testCDFAfromIupac2() {
		final String sequence = "AGCGGCGCGCTGGGGCTGGAAGCGTTATCGCGCTACGCTGCGGGGGCAACGTTGATTGAGATGGATCGCGCGGTTTCTCAGCAGTTAATTAAGAATCTGGCGACACTAAAAGCAGGCAATGCACGCGTGGTGAACAGCAACGCGATGTCATTCCTGGCGCAAAAAGGTACACCGCATAATATCGTGTTTGTCGATCCACCGTTCCGCCGTGGCTTGTTAGAAGAGACGATAAATTTACTGGAAGATAACGGCTGGCTGGCTGACGAAGCCCTGATTTATGTCGAAAGCGAAGTCGAAAACGGTCTGCCCACTGTTCCAGCAAACTGGTCATTACATCGGGAAAAAGTGGCGGGTCAGGTGGCTTATCGGCTGTATCAACGCGAAGCACAAGGAGAAAGTG";
		final int[] intSequence = Alphabet.getDnaAlphabet().buildIndexArray(sequence);
		List<GeneralizedString> genStringList = new ArrayList<GeneralizedString>(1);
		genStringList.add(Iupac.toGeneralizedString("RRRGAGANNR"));
		CDFA cdfa = DFAFactory.build(dnaAlphabet, genStringList, 50000);
		cdfa = cdfa.minimizeHopcroft();
		assertEquals(2, cdfa.countMatches(intSequence));
	}
	
	public void testCDFAfromIupac3() {
		Alphabet iupacAlphabet = Alphabet.getIupacAlphabet();
		Alphabet dnaAlphabet = Alphabet.getDnaAlphabet();
		final int[] sequence = dnaAlphabet.buildIndexArray("AGCGGCGCGCTGGGGCTGGAAGCGTTATCGCGCTACGCTGCGGGGGCAACGTTGATTGAGATGGATCGCGCGGTTTCTCAGCAGTTAATTAAGAATCTGGCGACACTAAAAGCAGGCAATGCACGCGTGGTGAACAGCAACGCGATGTCATTCCTGGCGCAAAAAGGTACACCGCATAATATCGTGTTTGTCGATCCACCGTTCCGCCGTGGCTTGTTAGAAGAGACGATAAATTTACTGGAAGATAACGGCTGGCTGGCTGACGAAGCCCTGATTTATGTCGAAAGCGAAGTCGAAAACGGTCTGCCCACTGTTCCAGCAAACTGGTCATTACATCGGGAAAAAGTGGCGGGTCAGGTGGCTTATCGGCTGTATCAACGCGAAGCACAAGGAGAAAGTG");
		IupacStringConstraints constraints = new IupacStringConstraints(4, true);
		for (int[] pattern : new IupacPatternGenerator(4,constraints)) {
			// System.out.println(iupacAlphabet.buildString(pattern));
			List<GeneralizedString> l = Iupac.toGeneralizedStrings(iupacAlphabet.buildString(pattern), true);
			NFA nfa = new GeneralizedStringsNFA(l);
			CDFA cdfa = DFAFactory.build(Alphabet.getDnaAlphabet(), nfa, 50000);
			CDFA minimalCdfa = cdfa.minimize();
			assertEquals(nfa.countMatches(sequence),minimalCdfa.countMatches(sequence));
		}
	}
	
}
